package com.hao.crawler;

import com.hao.crawler.parse.Parse;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;

/**
 * url爬虫
 * Created by R.hao on 2017/8/8.
 */
public class UrlCrawler implements Crawler {

    @Override
    public Elements run(final String url) throws Exception {
        Elements result = new Elements();

        //当前页
        Document page = Jsoup.connect(url).get();
        Elements ret = Parse.doc(page, doc -> {
            Element div = doc.getElementsByAttributeValue("class", "Clbc_Game_l_a").first();
            return div.getElementsByAttributeValue("target", "_blank");
        });
        result.addAll(ret);

        //下一页
        Element nextPage = page.getElementsMatchingText("下一页").last();
        if (null == nextPage) {
            return result;
        }
        String nextUrl = nextPage.absUrl("href");
        if (nextUrl.contains("10"))
            return result;
        Elements nextRet = run(nextUrl);
        result.addAll(nextRet);

        return result;
    }
}
